## [1] "macedonia" "united states of america"
## [3] "somaliland" "republic of serbia"
## [5] "swaziland" "united republic of tanzania"
## [7] "the bahamas" "democratic republic of the congo"
## [9] "republic of congo" "northern cyprus"
## [11] "antarctica" "guinea bissau"
## [13] "kosovo"
## region continent population
## 1 china Asia 1433783686
## 2 india Asia 1366417754
## 3 united states of america Americas 329064917
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## `Country/Region` = col_character(),
## `Province/State` = col_character(),
## Lat = col_double(),
## Long = col_double(),
## Confirmed = col_double(),
## Recovered = col_double(),
## Deaths = col_double()
## )
## Parsed with column specification:
## cols(
## Entity = col_character(),
## Date = col_date(format = ""),
## `Source URL` = col_character(),
## `Source label` = col_character(),
## Notes = col_character(),
## `Cumulative total` = col_double(),
## `Daily change in cumulative total` = col_double(),
## `Cumulative total per thousand` = col_double(),
## `Daily change in cumulative total per thousand` = col_double(),
## `3-day rolling mean daily change` = col_double(),
## `3-day rolling mean daily change per thousand` = col_double()
## )
## [1] "macedonia" "myanmar"
## [3] "united states of america" "north korea"
## [5] "solomon islands" "somaliland"
## [7] "republic of serbia" "swaziland"
## [9] "turkmenistan" "east timor"
## [11] "taiwan" "united republic of tanzania"
## [13] "the bahamas" "vanuatu"
## [15] "ivory coast" "democratic republic of the congo"
## [17] "republic of congo" "northern cyprus"
## [19] "czech republic" "antarctica"
## [21] "guinea bissau" "south korea"
## [23] "lesotho"
## # A tibble: 101 x 6
## # Groups: region [1]
## region Date confirmed recovered deaths actives
## <chr> <date> <dbl> <dbl> <dbl> <dbl>
## 1 canada 2020-01-22 0 0 0 0
## 2 canada 2020-01-23 0 0 0 0
## 3 canada 2020-01-24 0 0 0 0
## 4 canada 2020-01-25 0 0 0 0
## 5 canada 2020-01-26 1 0 0 1
## 6 canada 2020-01-27 1 0 0 1
## 7 canada 2020-01-28 2 0 0 2
## 8 canada 2020-01-29 2 0 0 2
## 9 canada 2020-01-30 2 0 0 2
## 10 canada 2020-01-31 4 0 0 4
## # ... with 91 more rows
## [1] "hong kong" "serbia" "united states"
## # A tibble: 3 x 8
## # Groups: region [3]
## Date region confirmed recovered deaths actives cumulative_test
## <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-01-22 denma~ 0 0 0 0 NA
## 2 2020-01-25 cabo ~ 0 0 0 0 NA
## 3 2020-03-18 unite~ 113 26 0 87 NA
## # ... with 1 more variable: population <dbl>
## [1] "united states" "korea, south"
## # A tibble: 3 x 8
## # Groups: region [3]
## Date.x region confirmed recovered deaths actives population
## <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2020-05-01 unite~ 1103461 164015 64943 874503 329064917
## 2 2020-05-01 unite~ 178685 892 27583 150210 67530172
## 3 2020-05-01 italy 207428 78249 28236 100943 60550075
## # ... with 1 more variable: cumulative_test <dbl>
## # A tibble: 3 x 3
## # Groups: region [3]
## region actives ratio_active
## <chr> <dbl> <dbl>
## 1 united states of america 874503 0.266
## 2 united kingdom 150210 0.222
## 3 italy 100943 0.167
## # A tibble: 3 x 3
## # Groups: region [3]
## region actives ratio_active
## <chr> <dbl> <dbl>
## 1 san marino 457 1.35
## 2 qatar 12648 0.447
## 3 andorra 234 0.303
deep research with testing. ratio of confirmed out of testing.
# national
data(state.regions)
US.time_covid = read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
## Parsed with column specification:
## cols(
## date = col_date(format = ""),
## state = col_character(),
## fips = col_character(),
## cases = col_double(),
## deaths = col_double()
## )
state.regions
## region abb fips.numeric fips.character
## 1 alaska AK 2 02
## 2 alabama AL 1 01
## 3 arkansas AR 5 05
## 4 arizona AZ 4 04
## 5 california CA 6 06
## 6 colorado CO 8 08
## 7 connecticut CT 9 09
## 8 district of columbia DC 11 11
## 9 delaware DE 10 10
## 10 florida FL 12 12
## 11 georgia GA 13 13
## 12 hawaii HI 15 15
## 13 iowa IA 19 19
## 14 idaho ID 16 16
## 15 illinois IL 17 17
## 16 indiana IN 18 18
## 17 kansas KS 20 20
## 18 kentucky KY 21 21
## 19 louisiana LA 22 22
## 20 massachusetts MA 25 25
## 21 maryland MD 24 24
## 22 maine ME 23 23
## 23 michigan MI 26 26
## 24 minnesota MN 27 27
## 25 missouri MO 29 29
## 26 mississippi MS 28 28
## 27 montana MT 30 30
## 28 north carolina NC 37 37
## 29 north dakota ND 38 38
## 30 nebraska NE 31 31
## 31 new hampshire NH 33 33
## 32 new jersey NJ 34 34
## 33 new mexico NM 35 35
## 34 nevada NV 32 32
## 35 new york NY 36 36
## 36 ohio OH 39 39
## 37 oklahoma OK 40 40
## 38 oregon OR 41 41
## 39 pennsylvania PA 42 42
## 40 rhode island RI 44 44
## 41 south carolina SC 45 45
## 42 south dakota SD 46 46
## 43 tennessee TN 47 47
## 44 texas TX 48 48
## 45 utah UT 49 49
## 46 virginia VA 51 51
## 47 vermont VT 50 50
## 48 washington WA 53 53
## 49 wisconsin WI 55 55
## 50 west virginia WV 54 54
## 51 wyoming WY 56 56
names(US.time_covid)[names(US.time_covid) == "state"] <- "region"
US.time_covid$region <- tolower(US.time_covid$region)
time_state= unique(US.time_covid$region)
df2 <- data.frame(matrix(unlist(time_state), nrow=length(time_state), byrow=T))
matched3 = inner_join(state.regions["region"], US.time_covid["region"], by ="region")
rename3 = setdiff(state.regions$region,matched3$region)
rename3 # everything is matched. No state name change is required.
## character(0)
US.time_covid_final = US.time_covid %>% select(-"fips")
write.csv(US.time_covid_final,"US_covid19_timeseries.csv")
US.time_covid_final[sample(nrow(US.time_covid_final), 3), ]
## # A tibble: 3 x 4
## date region cases deaths
## <date> <chr> <dbl> <dbl>
## 1 2020-04-15 rhode island 3529 87
## 2 2020-04-13 arizona 3702 122
## 3 2020-04-08 wyoming 230 0
#national "https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm"
N_death_detail = read_csv("https://data.cdc.gov/api/views/hc4f-j6nb/rows.csv?accessType=DOWNLOAD&bom=true&format=true")
## Parsed with column specification:
## cols(
## `Data as of` = col_character(),
## Group = col_character(),
## State = col_character(),
## Indicator = col_character(),
## `Start week` = col_character(),
## `End week` = col_character(),
## `All COVID-19 Deaths (U07.1)` = col_number(),
## `Deaths from All Causes` = col_number(),
## `Percent of Expected Deaths` = col_double(),
## `All Pneumonia Deaths (J12.0-J18.9)` = col_number(),
## `Deaths with Pneumonia and COVID-19 (J12.0-J18.9 and U07.1)` = col_number(),
## `All Influenza Deaths (J09-J11)` = col_number(),
## `Pneumonia, Influenza, and COVID-19 Deaths` = col_number(),
## Footnote = col_character()
## )
N_death_race = read_csv("https://data.cdc.gov/api/views/pj7m-y5uh/rows.csv?accessType=DOWNLOAD&bom=true&format=true")
## Parsed with column specification:
## cols(
## `Data as of` = col_character(),
## State = col_character(),
## Indicator = col_character(),
## `Non-Hispanic White` = col_double(),
## `Non-Hispanic Black or African American` = col_double(),
## `Non-Hispanic American Indian or Alaska Native` = col_double(),
## `Non-Hispanic Asian` = col_double(),
## `Hispanic or Latino` = col_double(),
## Other = col_double(),
## Footnote = col_character()
## )
# site for US detail data : https://github.com/nytimes/covid-19-data
# count and show graph how many article is release about corona virus by time series & how many article about corona is there : https://developer.nytimes.com/docs/articlesearch-product/1/overview